Reading and tidying data

# VSG seq results
vsg_seq_results <- read_csv("data/Beaver_raw_vsg_seq_results_WT.csv")

# Sequencing alignment data
alignment_data <- read_csv("data/Beaver_VSG_seq_alignment_data.csv")

# parasitemia count data
parasitemia <- read_csv("data/parasitemia.csv") 

Analzying and tidying tissue parasitemia qPCR data

# need my qpcr cleaner package (this is specific to the output of out qpcr machine)
#install_github("ABeav/qPCRr")
library(qPCRr)

# loading in each raw data frame 
parasitemia_5 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/", "abex7_parasitemia_5.xls", undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
parasitemia_6 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/", "abex7_parasitemia_6.xls", undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
parasitemia_7 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/", "abex7_parasitemia_7.xls", undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
parasitemia_8 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                            "abex7_parasitemia_8.xls",
                                            undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
parasitemia_9 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                            "abex7_parasitemia_9.xls",
                                            undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
parasitemia_11 <- clean_qPCR_files_check_NTC("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                            "abex7_parasitemia_11.xls",
                                            undetermined = "remove")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predict_5 <- tissue_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                              "abex7_parasitemia_5.xls",
                                    "data/parasite_load_qpcr_data/parasite_standard_numbers.csv",
                                    "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_5 <- predict_5[[1]]
standards_5 <- predict_5[[2]] %>%
  mutate(exp = "5")

predict_6 <- tissue_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                              "abex7_parasitemia_6.xls",
                                    "data/parasite_load_qpcr_data/parasite_standard_numbers.csv",
                                    "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_6 <- predict_6[[1]]
standards_6 <- predict_6[[2]] %>%
  mutate(exp = "6")

predict_7 <- tissue_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                              "abex7_parasitemia_7.xls",
                                  "data/parasite_load_qpcr_data/parasite_standard_numbers.csv",
                                    "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_7 <- predict_7[[1]]
standards_7 <- predict_7[[2]] %>%
  mutate(exp = "7")

predict_8 <- blood_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                            "abex7_parasitemia_8.xls",
                                  "data/parasite_load_qpcr_data/parasite_standard_numbers.csv",
                                  "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_8 <- predict_8[[1]]
standards_8 <- predict_8[[2]] %>%
  mutate(exp = "8")

predict_9 <- blood_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                             "abex7_parasitemia_9.xls",
                                    "data/parasite_load_qpcr_data/parasite_standard_numbers.csv",
                                    "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_9 <- predict_9[[1]]
standards_9 <- predict_9[[2]] %>%
  mutate(exp = "9")

# There is no parasitemia_10 because it didnt work very well so I redid those samples to make parasitemia_11

predict_11 <- blood_parasitemia_predictor("data/parasite_load_qpcr_data/raw_qpcr_data/",
                                            "abex7_parasitemia_11.xls",
                                            "data/parasite_load_qpcr_data/parasite_standard_numbers.csv", 
                                            "data/parasite_load_qpcr_data/tissue_sample_multipliers.csv")
## [1] "Warning: Undetermined removed"
## [1] "Congrats! No amplification in your NTC & your DNase treatment looks good."
predictions_11 <- predict_11[[1]]
standards_11 <- predict_11[[2]] %>%
  mutate(exp = "11")

# Tidy up the data frames and concatenate them 
tissue_predictions <- rbind(predictions_5, predictions_6, predictions_7) %>%
  separate(sample_name, c("mouse", "day", "tissue"), 
             sep = " ", remove = FALSE) 

replacement_samples <- predictions_11$sample_name

blood_predictions <- rbind(predictions_8, predictions_9) %>% 
  filter(!sample_name %in% replacement_samples) %>%
  rbind(predictions_11) %>%
  separate(sample_name, c("mouse", "day", "tissue"), 
             sep = " ", remove = FALSE) 

all_predictions <- full_join(tissue_predictions, blood_predictions)

Supplementary figure 1b

# Supplementary figure 1b 
tissue_parasite_load_dat  <- all_predictions %>%
  mutate(mouse = factor(mouse, levels = c("m12", "m27","m32","m33","m30","m36",
                                          "m4","m6","m2","m3","m34","m35"))) %>%
  mutate(parasites_per_gram = parasites_per_mg*1000) %>%
  group_by(tissue, day) %>%
  summarize(mean = mean(parasites_per_gram), 
            stderr= sd(parasites_per_gram),
            parasites_per_gram=parasites_per_gram,
            mass = mass,
            total_parasites_in_sample = total_parasites_in_sample, 
            parasites_per_mg = parasites_per_mg, 
            parasitemia_count = parasitemia_count,
            mouse=mouse, ct_mean = ct_mean) %>%
  mutate(day = case_when(day == "d6" ~ 6, 
                         day == "d10" ~ 10,
                         day == "d14" ~ 14)) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sc" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
  mutate(mass_of_organ = mass) %>%
  mutate(mass_of_organ = case_when(tissue == "blood" ~ 1500, 
                                   tissue == "ear" ~ 2730,
                                   TRUE ~ mass_of_organ)) %>%
  mutate(parasites_per_organ = mass_of_organ*parasites_per_mg)
  
Supplementary_figure_1b <- tissue_parasite_load_dat %>%
  filter(tissue != "blood") %>%
  ggplot(aes(x= day, y= parasites_per_gram)) +
  geom_boxplot(aes(y= parasites_per_gram, group = day)) +
  geom_point(aes(y = parasites_per_gram), size = 1) +
  theme(axis.text.x = element_text(color = "black", angle = 45, hjust = 1)) +
  scale_y_log10(breaks = c(10^4,10^5,10^6,10^7,10^8, 10^9),
                limits= c(10^4, 10^9),
                labels = trans_format("log10", math_format(10^.x))) +
  facet_grid(~tissue_final, scales = "free_x") +
  theme_pubr() +
  ylab("Parasites per gram of tissue") +
  scale_x_continuous(breaks = c(6,10,14)) +
  theme(panel.grid.major.y = element_line(linetype = 1, color = "grey")) +
  xlab("Time post infection (days)") 

Supplementary_figure_1b

#ggsave("Supplementary_figure_1b.pdf",Supplementary_figure_1b,
#       device = "pdf", width = 6, height = 4, units = "in")

#write.csv(all_predictions, "parasitemia_predictions.csv")
total_parasite_load_graph <- tissue_parasite_load_dat %>%
  ggplot(aes(x= day, y= parasites_per_organ)) +
  geom_boxplot(aes(y= parasites_per_organ, group = day)) +
  geom_point(aes(y = parasites_per_organ), size = 1) +
  theme(axis.text.x = element_text(color = "black", angle = 45, hjust = 1)) +
  scale_y_log10(breaks = c(10^3,10^4,10^5,10^6,10^7,10^8, 10^9),
                limits= c(10^3, 10^9.5),
                labels = trans_format("log10", math_format(10^.x))) +
  facet_grid(~tissue_final, scales = "free_x") +
  theme_pubr() +
  ylab("Total parasite load") +
  scale_x_continuous(breaks = c(6,10,14)) +
  theme(panel.grid.major.y = element_line(linetype = 1, color = "grey")) +
  xlab("Time post infection (days)") 

total_parasite_load_graph

ggsave("figures/figure_S1c.pdf",total_parasite_load_graph,
       device = "pdf", width = 6, height = 4, units = "in")

Supplementary figure 1c

plate_list <- tibble(plate = 1:6, exp = c("5", "6", "7", "8", "9", "11"))

all_standards <- rbind(standards_5, standards_6, standards_7, 
                       standards_8, standards_9, standards_11) %>%
  left_join(plate_list)

all_standards_graph <- all_standards %>%
  mutate(plate = as.character(plate)) %>%
  ggplot(aes(x = log10(parasites), y = ct_mean, color = plate)) + 
  geom_line() +
  geom_point() +
  theme_pubr(legend = "right", base_size = 22) +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")),
           label.x.npc = "middle") +
  scale_color_manual(values = color_palette_12) +
  geom_smooth(method = "lm", se = F) 
 
all_standards_graph

#ggsave("figures/figure_S1d.pdf", all_standards_graph,
#       device = "pdf", width = 8, height = 6, units = "in")
# Cleaned tissue parasite load data (qPCR)
parasite_load_qPCR <- read_csv("data/parasite_load_qpcr_data/parasite_load_qPCR.csv")
# Tidying tissue names to be consistent and renaming VSGs to cluster names
vsg_seq_results_tidy <- vsg_rename(vsg_seq_results, 
                                   "data/cluster_reference_table.txt") %>% 
  mutate(tissue = case_when(tissue == "Brain" ~ "brain",
                            tissue == "Ear" ~ "ear",
                            tissue == "Gon" ~ "gon",
                            tissue == "Heart" ~ "heart",
                            tissue == "Lung" ~ "lung",
                            tissue == "Sub" ~ "sub",
                            tissue == "gonfat" ~ "gon",
                            tissue == "subcu" ~ "sub", 
                            tissue == "lungs" ~ "lung",
                            tissue == "GonFat" ~ "gon", 
                            tissue == "SubCu" ~ "sub",
                            TRUE ~ tissue)) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
  select(-...1)

# Filtering VSGs that represented less than 0.01% of parasites in a sample
results <-  vsg_seq_results_tidy %>%
  filter(Percent > 0.01) %>% 
  mutate(day = factor(day, levels = c("D6", "D10", "D14"))) %>%
  group_by(samplename) %>%
  distinct(cluster, .keep_all = TRUE) %>%
  ungroup()

# Getting rid of samples with less than 100,000 reads aligning
bad_alignment <- alignment_data %>%
  filter(reads_aligned < 100000)

results <- anti_join( results, bad_alignment, by = c("mouse", "tissue"))

# Filter VSGs that represent less than 1 parasite based on Percentage and parasitemia data
parasite_numbers <- parasite_load_qPCR %>% 
  mutate(tissue = case_when(tissue == "sc" ~ "sub", TRUE ~ tissue)) %>%
  mutate(day = str_to_upper(day),
         mouse = str_to_upper(mouse)) %>%
  select(mouse,day,tissue, total_parasites_in_sample)

results <- left_join(results, parasite_numbers) %>%
  mutate(parasites = (Percent/100)*total_parasites_in_sample)

vsgs_tossed_from_parasitemia<- results %>%
  filter(parasites < 1) %>%
  group_by(samplename) %>%
  count()

results <- results %>%
  filter(parasites > 1 | is.na(parasites) == TRUE)

# Tidying and giving mice new figure names (1-12)

results <- results %>%
  mutate(day = factor(day, levels = c("D6", "D10", "D14"))) %>%
  mutate(mouse_final = case_when(mouse == "M12" ~ "1", 
                                 mouse == "M27" ~ "2",
                                 mouse == "M32" ~ "3",
                                 mouse == "M33" ~ "4",
                                 mouse == "M30" ~ "5",
                                 mouse == "M36" ~ "6",
                                 mouse == "M4" ~ "7",
                                 mouse == "M6" ~ "8",
                                 mouse == "M2" ~ "9",
                                 mouse == "M34" ~ "10",
                                 mouse == "M35" ~ "11",
                                 mouse == "M3" ~ "12")) %>%
  mutate(mouse_final = as.numeric(mouse_final))

# Need one data frame with all VSGs represented in each space even if its not detected there for plotting purposes. 
results_all_combos <- vsg_expand(vsg_seq_results_tidy, "samplename", "cluster") %>%
  separate(samplename, c("mouse", "day", "tissue"), sep = "_", remove = FALSE) %>%
   mutate(tissue = case_when(tissue == "Brain" ~ "brain",
                            tissue == "Ear" ~ "ear",
                            tissue == "Gon" ~ "gon",
                            tissue == "Heart" ~ "heart",
                            tissue == "Lung" ~ "lung",
                            tissue == "Sub" ~ "sub",
                            tissue == "gonfat" ~ "gon",
                            tissue == "subcu" ~ "sub", 
                            tissue == "lungs" ~ "lung",
                            tissue == "GonFat" ~ "gon", 
                            tissue == "SubCu" ~ "sub",
                            TRUE ~ tissue)) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
  anti_join(bad_alignment, by = c("mouse", "tissue"))

Main figures

Figure 1

A

top_11_VSGs <- results %>% 
  select(day, tissue, cluster, Percent) %>%
  select(-tissue, -day) %>%
  group_by(cluster) %>%
  summarize(sum = max(Percent)) %>%
  slice_max(sum, n = 11) %>%
  mutate(color = cluster,
         top_11_whole = "top") 

top_VSGs <- results %>% 
  filter(day == "D14" ) %>%
  select(day, tissue, cluster, parasites) %>%
  select(-tissue, -day) %>%
  group_by(cluster) %>%
  summarize(parasites_sum = sum(parasites, na.rm = TRUE)) %>%
  slice_max(parasites_sum, n = 40) 

common_D14_eatro1125_vsgs <- top_VSGs %>%
  left_join(results) %>%
  ungroup() %>%
  group_by(cluster) %>%
  filter(assembled_VSG_length == max(assembled_VSG_length)) %>%
  filter(pct_id_vs_match == max(pct_id_vs_match)) %>%
  select(cluster, hit_VSG, pct_id_vs_match, pct_id_vs_query, parasites_sum) %>%
  ungroup() %>%
  distinct() %>%
  filter(pct_id_vs_query > 90) %>%
  filter(pct_id_vs_match > 90) %>%
  group_by(hit_VSG) %>%
  filter(pct_id_vs_query == max(pct_id_vs_query)) %>%
  distinct() %>%
  mutate(hit_VSG = fct_reorder(hit_VSG, parasites_sum, .desc = T))
  
  #mutate(color = cluster,
   #      top_11_whole = "top") 
  
#write_csv(common_D14_eatro1125_vsgs, "common_D14_eatro1125_vsgs.csv")

stacked_bar_plot_dat <- results %>%
  mutate(mouse = factor(mouse, levels = c("M12","M27", "M32", 
                                          "M33", "M30", "M36", 
                                          "M4", "M6", "M2",  
                                          "M34", "M35", "M3" ))) %>%
  left_join(top_11_VSGs) %>%
  mutate(color = case_when(is.na(color) == TRUE ~ "other",
                            TRUE ~ color )) %>%
  mutate(day_tis = case_when(tissue_final == "blood" ~ str_c(day, tissue_final, sep = " "), 
                             TRUE ~ tissue_final)) %>%
  mutate(day_tis = factor(day_tis, levels = c("D6 blood", "D10 blood", "D14 blood",
                                              "brain","skin","gon. fat","heart","lung","s.c. fat"))) %>%
  mutate(color = paste("VSG", color, sep = " "), 
         color = case_when( color == "VSG Cluster 56" ~ "AnTat1.1",
                              color == "VSG Cluster 1830" ~ "VSG-421",
                              TRUE ~ color)) %>%
  mutate(color = fct_reorder(color, sum , .desc = TRUE, .na_rm = F),
         color = fct_relevel(color, "VSG Cluster 294", after = 2), 
         mouse_final_2 = paste("Mouse", mouse_final, sep = " "),
         mouse_final_2 = fct_reorder(mouse_final_2, mouse_final)) %>% 
  group_by(mouse_final_2, day, tissue_final) %>%
  mutate(percent_final = (Percent/sum(Percent))*100)

stacked_bar_plot <- stacked_bar_plot_dat %>%
  ggplot(aes(x = day_tis, y = percent_final, fill = color)) +
  geom_col(color = "black") +
  facet_wrap("mouse_final_2", nrow = 3) + 
  theme_pubr(legend = "right", base_size =20) +
  theme(axis.text.x = element_text(angle = 90, size = 8)) +
  scale_fill_manual(values = color_palette_12) +
  xlab("") +
  ylab("Percent of expression")
stacked_bar_plot 

#ggsave("figures/figure_1A.pdf", stacked_bar_plot,
#       device = "pdf", width = 10, height = 6, units = "in")

B

D10_mouse_VSG_counts <- results %>%
  filter(mouse_final == "5" | mouse_final == "6" | mouse_final == "7" | mouse_final == "8",
         day == "D10") %>%
  mutate(space = case_when(tissue == 'blood' ~ 'blood only', TRUE ~ 'tissues only')) %>%
  group_by(space) %>%
  distinct(cluster, mouse_final, day) %>%
  ungroup() %>%
  group_by(mouse_final, cluster) %>%
  add_tally() %>%
  mutate(space = case_when(n == 2 ~ 'shared', TRUE ~ space)) %>%
  ungroup() %>% 
  group_by(mouse_final, space) %>%
  distinct(cluster, space) %>%
  count(name = "VSGs",sort = TRUE) %>%
  ungroup() %>%
  group_by(mouse_final) %>%
  mutate(total_vsgs = sum(VSGs)) %>%
  ungroup() %>%
  mutate(day = "Day 10")

D6_mouse_VSG_counts <- results %>%
  filter(mouse_final == "1" | mouse_final == "2" | mouse_final == "3" 
         | mouse_final == "4") %>% 
  mutate(space = case_when(tissue == 'blood' ~ 'blood only', TRUE ~ 'tissues only')) %>%
  group_by(space) %>%
  distinct(cluster, mouse_final, day) %>%
  ungroup() %>%
  group_by(mouse_final, cluster) %>%
  add_tally() %>%
  mutate(space = case_when(n == 2 ~ 'shared', TRUE ~ space)) %>%
  ungroup() %>% 
  group_by(mouse_final, space) %>%
  distinct(cluster, space) %>%
  count(name = "VSGs",sort = TRUE) %>%
  ungroup() %>%
  group_by(mouse_final) %>%
  mutate(total_vsgs = sum(VSGs)) %>%
  ungroup() %>%
  mutate(day = "Day 6")

D14_mouse_VSG_counts <- results %>%
  filter(mouse_final == "9" | mouse_final == "10" | mouse_final == "11" | mouse_final == "12",
         day == "D14") %>%
  mutate(space = case_when(tissue == 'blood' ~ 'blood only', TRUE ~ 'tissues only')) %>%
  group_by(space) %>%
  distinct(cluster, mouse_final, day) %>%
  ungroup() %>%
  group_by(mouse_final, cluster) %>%
  add_tally() %>%
  mutate(space = case_when(n == 2 ~ 'shared', TRUE ~ space)) %>%
  ungroup() %>% 
  group_by(mouse_final, space) %>%
  distinct(cluster, space) %>%
  count(name = "VSGs",sort = TRUE) %>%
  ungroup() %>%
  group_by(mouse_final) %>%
  mutate(total_vsgs = sum(VSGs)) %>%
  ungroup() %>%
  mutate(day = "Day 14")

mouse_VSGs_count <- D6_mouse_VSG_counts %>%
  bind_rows(D10_mouse_VSG_counts, D14_mouse_VSG_counts) %>%
  mutate(mouse_final = as.character(mouse_final)) %>%
  mutate(
         day = factor(day, levels = c("Day 6", "Day 10", "Day 14")),
         percent_vsgs = (VSGs/total_vsgs)*100) %>%
  mutate(mouse_final = as.numeric(mouse_final))

mouse_VSGs_count_summary <- mouse_VSGs_count %>%
  ungroup() %>%
  group_by(space, mouse_final) %>%
  summarize(mean(percent_vsgs))

mouse_VSGs_count_plot <- mouse_VSGs_count %>%
  ggplot(aes(x = mouse_final, y = percent_vsgs))+
  geom_col(aes(fill = space))+
  facet_wrap("day", scales = "free_x") + 
  ylab("Percent of VSGs") + 
  xlab("Mouse") +
  theme_pubr(base_size = 20) +
  theme(legend.position = "top") +
  scale_fill_manual(values = color_palette_3)

mouse_VSGs_count_plot

ggsave("figures/figure_1B.pdf", mouse_VSGs_count_plot,
       device = "pdf", width = 8, height = 6, units = "in")

C

counts_per_sample <- results %>%
  vsg_count(day, mouse_final, tissue_final) %>%
  mutate(space = case_when(tissue_final != "blood" ~ "tissues", 
                           TRUE ~ tissue_final)) 

test_of_normality <- counts_per_sample %>% 
  group_by(day) %>% 
  shapiro_test(vsg_count)

counts_by_space_t_test_dat <- counts_per_sample %>% 
  group_by(day) %>%
  pairwise_t_test(vsg_count ~ space, p.adjust.method = "BH") %>%
  add_xy_position(x = "day", dodge = 0.75)

counts_by_space_plot <- counts_per_sample %>%
  ggplot(aes(x = day, y = vsg_count, group = interaction(day, space))) + 
  geom_boxplot(aes(fill = space), alpha = .5, width =.75) +
  geom_point(aes(color = space),size = 2.5, position = position_jitterdodge(.1)) + 
  theme_pubr(legend = "right", base_size = 20) +
  xlab("Time post infection (days)") +
  ylab("Number of VSGs") +
  scale_fill_manual(values = color_palette_12) +
  scale_color_manual(values = color_palette_12) +
  add_pvalue(counts_by_space_t_test_dat, 
             label = "p.adj.signif",
             xmin = "xmin", 
             xmax = "xmax") +
  scale_y_continuous(limits = c(0,350))
counts_by_space_plot

ggsave("figures/figure_1C.pdf", counts_by_space_plot,
       device = "pdf", width = 8, height = 6, units = "in")

D

counts_by_sample_dunn_test<- counts_per_sample %>%
  group_by(day) %>%
  dunn_test(vsg_count ~ tissue_final, p.adjust.method = "BH" ) %>%
  add_xy_position(step.increase = 0.05)

counts_by_sample_norm_test <- counts_per_sample %>%
  mutate(test_tissue = paste(day, tissue_final, sep = "_")) %>%
  filter(test_tissue != "D10_brain") %>%
  select(-test_tissue) %>%
  group_by(day, tissue_final) %>%
  shapiro_test(vsg_count)

counts_by_sample_boxplot <- results %>%
  filter(mode == "IV",
         strain == "WT") %>%
  vsg_count(day, mouse, tissue_final) %>%
  ggplot(aes(x = tissue_final, y = vsg_count)) + 
  facet_wrap( vars(day))  +
  geom_boxplot(outlier.size = 0) +
  geom_dotplot(binaxis='y', stackdir= "center", stackratio = .8, dotsize=.32) +
  xlab("") +
  ylab("Number of VSGs") +
  theme_pubr(base_size = 20) +
  theme(axis.text.x = element_text(angle = 90,
                                 hjust = 1, 
                                 size = 12, 
                                 color = 'black'),
        legend.position = "right") +
  stat_pvalue_manual(counts_by_sample_dunn_test, 
             label = "p.adj.signif",
             xmin = "xmin", 
             xmax = "xmax", 
             hide.ns = TRUE,
             size = 3.8, 
             tip.length = 0.02,
             bracket.nudge.y = -30)
counts_by_sample_boxplot

ggsave("figures/figure_1D.pdf", counts_by_sample_boxplot,
       device = "pdf", width = 8, height = 6, units = "in")

Figure 2

B

vsg_percent_unique <- results %>%
  select(cluster, mouse_final, day, tissue_final) %>%
  group_by(mouse_final, cluster) %>%
  add_tally() %>%
  filter(n == 1) %>% 
  group_by(mouse_final, day, tissue_final) %>%
  count() %>%
  rename(number_unique = n) %>%
  right_join(counts_per_sample) %>%
  filter(mouse_final %in% day10_mice & day == "D10" |
           mouse_final %in% day14_mice & day == "D14") %>%
  mutate(percent_unique = (number_unique / vsg_count)*100,
         ratio_unique = number_unique / vsg_count,
         not_unique = vsg_count - number_unique)%>%
  mutate(percent_unique= replace_na(percent_unique, 0)) 

vsg_percent_unique_norm_test <- vsg_percent_unique %>%
  mutate(test_tissue = paste(day, tissue_final, sep = "_")) %>%
  filter(test_tissue != "D10_brain") %>%
  select(-test_tissue) %>%
  group_by(day, tissue_final) %>%
  shapiro_test(percent_unique)

vsg_percent_unique_t_test_dat <- vsg_percent_unique %>%
  group_by(day) %>%
  pairwise_t_test(percent_unique ~ tissue_final, p.adjust.method = "BH") %>%
  add_xy_position()

vsg_percent_unique_graph <- vsg_percent_unique %>% 
  ggplot(aes(x = tissue_final, y = percent_unique)) +
  geom_boxplot(outlier.size = 0) +
  geom_dotplot(binaxis='y', stackdir= "center", stackratio = .8, dotsize=.4) + 
  facet_wrap("day") +
  xlab("") +
  ylab("Percent of VSGs unique to only one space within each mouse") +
  theme_pubr(base_size = 20) +
  #scale_y_continuous(limits = c(0,100)) +
  theme(axis.text.x = element_text(angle = 90,
                                 hjust = 1, 
                                 size = 12, 
                                 color = 'black'),
        legend.position = "right",
        legend.background = element_rect(fill = NULL, color = NULL)) +
 stat_pvalue_manual(vsg_percent_unique_t_test_dat, 
             label = "p.adj.signif",
             xmin = "xmin", 
             xmax = "xmax", 
             hide.ns = TRUE)

vsg_percent_unique_graph

ggsave("figures/figure_2B.pdf", vsg_percent_unique_graph,
       device = "pdf", width = 8, height = 6, units = "in")

C

next_dominant_vsgs <- results %>%
  filter(cluster == "Cluster 294" | cluster == "Cluster 1831" | 
           cluster == "Cluster 504" | cluster == "Cluster 4")%>% 
  select(mouse, cluster, mouse_final) %>%
  merge(results_all_combos, by = c('mouse','cluster')) %>%
  mutate(Percent = case_when(Percent < .01 ~ 0,
                             TRUE~ Percent)) %>%
  mutate(Percent = Percent + .001) %>%
  mutate(day = factor(day, levels = c("D6", "D10", "D14"))) %>%
  distinct()

early_cluster_vsg_expression <- next_dominant_vsgs %>%
    mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
  filter(mouse %in% day6_mice_IV_wt & day == "D6" |
         mouse %in% day10_mice_IV_wt & day == "D10"|
         mouse %in% day14_mice_IV_wt & day == "D14") %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  mutate(day_num_char = as.character(day_num)) %>%
  mutate(day_num_char = factor(day_num_char, levels = c("6","10", "14"))) %>%
  select(-day1) %>% 
  filter(cluster != "Cluster 4") %>%
  mutate(space = case_when(tissue == "blood" ~ "blood", 
                           TRUE ~ "tissues"),
         cluster = factor(cluster, levels = c("Cluster 294", "Cluster 504", 
                                              "Cluster 1831"))) %>%
  ggplot(aes(x = day_num_char, y = log10(Percent),
             group = interaction(day_num_char,space))) +
  geom_boxplot(aes(fill = space), alpha = .5, width = .75) +
  geom_point(aes(color = tissue_final), size = 2, 
             position = position_jitterdodge(jitter.width = .1)) +
  theme_pubr(legend = "right", base_size = 20) +
  facet_wrap("cluster", nrow =1, shrink = F) +
  xlab("Time post infection (days)") +
  ylab("Percent of parasites expressing the starting VSG") +
  scale_color_manual(values = color_palette_12) +
  scale_fill_manual(values = color_palette_12) +
  scale_y_continuous(limits = c(-3,2)) +
  geom_hline(yintercept = -2, linetype = 2) +
  scale_y_continuous(labels = c("n.d.", "0.01%","0.1%", "1.0%", "10%", "100%")) +
  guides(color = guide_legend(title = "Tissue"),
         fill = guide_legend(title = "Space"))
early_cluster_vsg_expression

ggsave("figures/figure_2C.pdf", early_cluster_vsg_expression,
       device = "pdf", width = 8, height = 6, units = "in")

Figrue 3

A

starting_vsgs <- results %>%
  filter(day == "D6", tissue == "blood") %>%
  vsg_max(mouse) %>% 
  select(mouse, cluster, mouse_final) %>%
  merge(results_all_combos, by = c('mouse','cluster')) %>%
  mutate(Percent = case_when(Percent < .01 ~ 0,
                             TRUE~ Percent)) %>%
  mutate(Percent = Percent + .001) %>%
  mutate(day = factor(day, levels = c("D6", "D10", "D14"))) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue))

starting_vsg_espression_3_dat <- starting_vsgs %>%
  filter(mouse %in% day6_mice_IV_wt & day == "D6" |
         mouse %in% day10_mice_IV_wt & day == "D10"|
         mouse %in% day14_mice_IV_wt & day == "D14") %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  mutate(day_num_char = as.character(day_num)) %>%
  mutate(day_num_char = factor(day_num_char, levels = c("6","10", "14"))) %>%
  select(-day1) %>% 
  mutate(space = case_when(tissue == "blood" ~ "blood", 
                           TRUE ~ "tissues")) %>%
  mutate(expression = case_when(Percent == .001 ~ "N", 
                                TRUE ~ "Y"),
         day = factor(day_num, levels = c("6", "10", "14"))) 


wilcox_test_starting_vsgs <- starting_vsg_espression_3_dat %>%
  mutate(Percent = Percent - .001) %>%
  group_by(day) %>%
  pairwise_wilcox_test(Percent ~ space, p.adjust.method = "BH") %>%
  add_xy_position() %>%
  mutate(y.position = log10(y.position)) %>%
  mutate(xmin = case_when(day == "6" ~ .75, 
                          day == "10" ~ 1.75, 
                          day == "14" ~ 2.75)) %>%
  mutate(xmax = case_when(day == "6" ~ 1.25, 
                          day == "10" ~ 2.25,
                          day == "14" ~ 3.25)) %>%
  mutate(y.position = case_when(day == "6" ~ 2.4, 
                          day == "10" ~ 2.2,
                          day == "14" ~ .7))

starting_vsg_espression_3 <- starting_vsg_espression_3_dat %>%
  ggplot(aes(x = day, y = log10(Percent), group = interaction(day, space))) +
  geom_boxplot(aes(fill = space), alpha = .5, width = 1) +
  geom_point(aes(color = tissue_final), size = 2.5, position =
               position_jitterdodge(jitter.width = .1, dodge.width = 1)) +
  theme_pubr(legend = "right", base_size = 20) +
  xlab("Time post infection (days)") +
  ylab("Percent of parasites\nexpressing the starting VSG") +
  scale_color_manual(values = color_palette_12) +
  scale_fill_manual(values = color_palette_12) +
  stat_pvalue_manual(wilcox_test_starting_vsgs, 
             label = "p.adj.signif",
             xmin = "xmin", 
             xmax = "xmax") +
  ylim(-3,2) +
  geom_hline(yintercept = -2, linetype = 2) +
  scale_y_continuous(labels = c("n.d.", "0.01%","0.1%", 
                                "1.0%", "10%", "100%", "1000%")) +
  guides(color = guide_legend(title = "Tissue"),
         fill = guide_legend(title = "Space"))
starting_vsg_espression_3

ggsave("figures/figure_3A.pdf", starting_vsg_espression_3,
       device = "pdf", width = 8, height = 6, units = "in")

B

#Facs data

Figure 4

# Loading AID-/- sequencing data
# Data from the first flow cell, which was with some of the WT samples
AID_raw_results_1 <- read_csv("data/AID data/Beaver_raw_vsg_seq_results_AID_1.csv") %>%
  vsg_rename("data/cluster_reference_table.txt") %>% 
  mutate(tissue = case_when(tissue == "Brain" ~ "brain",
                            tissue == "Ear" ~ "ear",
                            tissue == "Gon" ~ "gon",
                            tissue == "Heart" ~ "heart",
                            tissue == "Lung" ~ "lung",
                            tissue == "Sub" ~ "sub",
                            tissue == "gonfat" ~ "gon",
                            tissue == "subcu" ~ "sub", 
                            tissue == "lungs" ~ "lung",
                            tissue == "GonFat" ~ "gon", 
                            tissue == "SubCu" ~ "sub",
                            TRUE ~ tissue)) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
  select(-mode,
         -...1) 

# Data from the second AID flow cell
AID_raw_results_2 <- read_csv("data/AID data/Beaver_raw_vsg_seq_results_AID_2.txt") %>%
  mutate(strain = "AID",
         experiment = "2") %>% 
  vsg_rename("data/AID data/AID_cluster_reference_table.txt") %>%
  mutate(tissue = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "gonadal" ~ "gon. fat",
                            tissue == "lungs" ~ "lung",
                            tissue == "sc" ~ "s.c. fat",
                            tissue == "scfat" ~ "s.c. fat",
                            TRUE ~ tissue),
         tissue_final = tissue) %>%
  mutate(day = toupper(day))

# Combinig all AID VSG seq results 
AID_raw_results <- rbind(AID_raw_results_1, AID_raw_results_2)

# Filtering out VSGs less than 0.01% of parasites in a sample
AID_results <- AID_raw_results %>% 
  filter(Percent > 0.01) %>% 
  mutate(day = factor(day, levels = c("D6", "D10", "D14"))) %>%
  group_by(samplename) %>%
  distinct(cluster, .keep_all = TRUE) %>%
  ungroup() %>%
  mutate(mouse_final = paste(mouse, strain, sep = "_"))

AID_results_all_combos <- vsg_expand(AID_results, "samplename", "cluster") %>%
  separate(samplename, c("mouse", "day", "tissue"), sep = "_", remove = FALSE) %>%
  mutate(strain = "AID")

# Combining WT and AID data
wt_results <- results %>%
  select(-mode, -total_parasites_in_sample, -parasites) %>%
  mutate(experiment = "1")

wt_results_all_combos <-  vsg_expand(wt_results, "samplename", "cluster") %>%
  separate(samplename, c("mouse", "day", "tissue"), sep = "_", remove = FALSE) %>%
  mutate(strain = "WT") %>%
  anti_join(bad_alignment, by = c("mouse", "tissue"))

wt_and_aid_results <- rbind(AID_results, wt_results)

# Need to create a data frame with all VSGs represented in each sample
wt_and_aid_results_all_combos <- rbind(AID_results_all_combos, wt_results_all_combos) %>%
  mutate(tissue = case_when(tissue == "Brain" ~ "brain",
                            tissue == "Ear" ~ "ear",
                            tissue == "Gon" ~ "gon",
                            tissue == "Heart" ~ "heart",
                            tissue == "Lung" ~ "lung",
                            tissue == "Sub" ~ "sub",
                            tissue == "gonfat" ~ "gon",
                            tissue == "subcu" ~ "sub", 
                            tissue == "lungs" ~ "lung",
                            tissue == "GonFat" ~ "gon", 
                            tissue == "SubCu" ~ "sub",
                            TRUE ~ tissue)) %>%
  mutate(tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sub" ~ "s.c. fat",
                            TRUE ~ tissue))

A

wt_and_aid_starting_vsgs <- wt_and_aid_results %>%
  filter(day == "D6", tissue == "blood") %>%
  vsg_max(mouse) %>%
  select(mouse, cluster) %>%
  merge(wt_and_aid_results_all_combos, by = c('mouse','cluster')) %>%
  mutate(Percent = case_when(Percent < .01 ~ 0,
                             TRUE~ Percent)) %>%
  mutate(Percent = Percent + .001) %>%
  mutate(space = case_when(tissue != "blood" ~ "tissues", 
                           TRUE ~ tissue)) %>%
  mutate(strain_space = paste(strain, space, sep = " ")) %>%
  mutate(strain_space = factor(strain_space, levels = c("WT blood","WT tissues","AID blood","AID tissues"))) %>%
  mutate(day = toupper(day))

wt_and_aid_starting_vsg_espression <- wt_and_aid_starting_vsgs %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  mutate(day_num_char = as.character(day_num)) %>%
  mutate(day_num_char = factor(day_num_char, levels = c("6","10", "14"))) %>%
  select(-day1) %>% 
  filter(day != "D10") %>% 
  mutate(strain = factor(strain, levels = c("WT", "AID" ))) %>%
  ggplot(aes(x = day_num_char, y = log10(Percent), group = interaction(day_num_char, space))) +
  geom_boxplot(aes(fill = space), alpha = .5, width = .75) +
  geom_point(aes(color = space), size = 2.5, position = position_jitterdodge(jitter.width = .1)) +
  theme_pubr(legend = "right", base_size = 20) +
  xlab("Time post infection (days)") +
  ylab("Percent of parasites\nexpressing the starting VSG") +
  scale_color_manual(values = red_blue) +
  scale_fill_manual(values = red_blue) +
  ylim(-3,2) +
  facet_wrap("strain") +
  scale_y_continuous(labels = c("n.d.", "0.01%","0.1%", "1%", "10%", "100%")) +
  geom_hline(yintercept = -2, linetype = 2) 

wt_and_aid_starting_vsg_espression

ggsave("figures/figure_4A.pdf", wt_and_aid_starting_vsg_espression,
       device = "pdf", width = 8, height = 6, units = "in")

B

wt_and_aid_vsg_counts <- wt_and_aid_results %>% 
  vsg_count(mouse_final, day, tissue, strain) %>% 
  mutate(space = case_when(tissue != "blood" ~ "tissues", 
                           TRUE ~ tissue)) %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  mutate(day_num_char = as.character(day_num)) %>%
  mutate(day_num_char = factor(day_num_char, levels = c("6","10", "14"))) %>%
  select(-day1) %>% 
  mutate(strain = factor(strain, levels = c("WT", "AID"))) %>%
  filter(day!= "D10") 

wt_and_aid_counts_norm_test <- wt_and_aid_vsg_counts %>% 
  filter(day == "D14") %>%
  group_by(space, strain) %>%
  shapiro_test(vsg_count)

wt_and_aid_counts_t_test <- wt_and_aid_vsg_counts %>% 
  filter(day == "D14") %>%
 # mutate(test = paste(strain, day, space, sep = "_")) %>%
  group_by(space, day_num_char) %>%
  pairwise_t_test(vsg_count ~ strain, p.adjust.method = "BH") 

wt_and_aid_vsg_count_plot <- wt_and_aid_vsg_counts %>%
  ggplot(aes(x = day_num_char, y = vsg_count, group = interaction(day, space))) + 
  geom_boxplot(aes(fill = space), alpha = .5, width =.75) +
  geom_point(aes(color = space),size = 2.5, position = position_jitterdodge(.1)) + 
  theme_pubr(legend = "right", base_size = 20) +
  xlab("Time post infection (days)") +
  ylab("Number of VSGs") +
  facet_wrap("strain") +
  scale_fill_manual(values = red_blue) +
  scale_color_manual(values = red_blue) + 
  scale_y_continuous(limits = c(0,275), n.breaks = 7)
wt_and_aid_vsg_count_plot

ggsave("figures/figure_4B.pdf", wt_and_aid_vsg_count_plot,
       device = "pdf", width = 8, height = 6, units = "in")

Supplemental figures

S.5

# Loading Elisa data
IgG_dat <- read_csv("data/elisa_data/BZEX44_09012020_IgG_lower_dilution_R.csv") %>%
  mutate(ab = "IgG") 

IgG_dat <- IgG_dat %>%
  separate(Sample.Name, c("day", "mouse", "strain"), sep = "_") %>%
  filter(!str_detect(day, "STD"),
         mouse != "Blank") %>%
  select(mouse, day, strain, ab, Conc_mg.ml) %>%
  group_by(mouse, day, strain, ab) %>% 
  summarise(mean_concentraion_mg_ml = mean(Conc_mg.ml))
  

IgM_dat <- read_csv("data/elisa_data/BZEX44_09042020_IgM_R.csv") %>%
  mutate(ab = "IgM") 

IgM_dat <- IgM_dat %>%
  separate(Sample.Name, c("day", "mouse", "strain"), sep = "_") %>%
  filter(!str_detect(day, "STD")) %>%
  select(mouse, day, strain, ab, Conc_mg.ml) %>%
  group_by(mouse, day, strain, ab) %>% 
  summarise(mean_concentraion_mg_ml = mean(Conc_mg.ml))

# Merge IgG and IgM data
elisa_dat <- rbind(IgG_dat, IgM_dat)

elisa_plot <- elisa_dat %>%
  filter(strain == "WT" | strain == "AID") %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  select(-day1) %>%
  mutate(ab = factor(ab, levels = c("IgM", "IgG"))) %>%
  ggplot(aes(x = day_num, y = mean_concentraion_mg_ml, color = ab, fill = ab)) +
  geom_point(position = position_dodge2(1)) +
  theme_pubr(legend = "right", base_size = 20) +
  facet_wrap("strain") +
  stat_summary(aes(group=ab),fun=mean,geom="line", size = 1.2) +
  scale_color_manual(values = red_blue) +
  scale_fill_manual(values = red_blue) +
  scale_x_continuous( breaks = c(0,6,10,14)) + 
  ylab("Concentration of antibody (mg/ml)") +
  xlab("Time post infection (days)")
elisa_plot

ggsave("figures/figure_S5.pdf", elisa_plot,
       device = "pdf", width = 8, height = 6, units = "in")

S.3C

pad1_expression <- read_csv("data/pad1_qpcr_data/pad1_expression.csv") %>%
  mutate(across(day, str_replace, "d","D"),
         across(mouse, str_replace, "m","M"),
         tissue_final = case_when(tissue == "ear" ~ "skin",
                            tissue == "gon" ~ "gon. fat",
                            tissue == "sc" ~ "s.c. fat",
                            TRUE ~ tissue)) %>%
   mutate(mouse_final = case_when(mouse == "M12" ~ "1", 
                                 mouse == "M27" ~ "2",
                                 mouse == "M32" ~ "3",
                                 mouse == "M33" ~ "4",
                                 mouse == "M30" ~ "5",
                                 mouse == "M36" ~ "6",
                                 mouse == "M4" ~ "7",
                                 mouse == "M6" ~ "8",
                                 mouse == "M2" ~ "9",
                                 mouse == "M34" ~ "10",
                                 mouse == "M35" ~ "11",
                                 mouse == "M3" ~ "12")) %>%
  mutate(mouse_final = as.numeric(mouse_final)) %>%
  left_join(counts_per_sample)

vsgs_vs_pad1 <- pad1_expression %>%
  filter(day != "D6") %>%
  mutate(space = case_when(tissue_final == "blood" ~ "blood", tissue_final != "blood" ~ "tissues")) %>%
  ggplot(aes(x = log10(relative_pad1_expression), y = vsg_count)) +
  geom_point(aes(color = tissue_final)) + 
  geom_smooth( method='lm', color = "black", size = .5)+
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")),
           label.x.npc = "left") +
  theme_pubr(legend = "right", base_size = 12) +
  facet_wrap("space", scales = "fixed") +
  scale_color_manual(values = color_palette_12) +
  ylab("Number of VSGs") +
  xlab("Log10(relative PAD1 expression)")
vsgs_vs_pad1

ggsave("figures/figure_S3C.pdf",vsgs_vs_pad1,
       device = "pdf", width = 8, height = 6, units = "in")

S.6

tsetse_raw_results <- vsg_read("data/tsetse_infection_data/tsetse_all_results.txt")

tsetse_results <- vsg_rename(tsetse_raw_results, "data/tsetse_infection_data/cluster_reference_table.txt") %>%
  filter(mouse != "Undetermined")

tsetse_results <- tsetse_results %>%
  filter(Percent > 0.01) %>%
  mutate(day = factor(day, levels = c("D5", "D10", "D14")))
tsetse_counts_per_sample <- tsetse_results %>%
  vsg_count(day, mouse, tissue) %>%
  mutate(space = case_when(tissue != "blood" ~ "tissues", 
                           TRUE ~ tissue)) 

tsetse_test_of_normality <- tsetse_counts_per_sample %>% 
  filter(day == "D14") %>%
  group_by(space) %>% 
  shapiro_test(vsg_count)

tsetse_counts_by_space_t_test_dat <- tsetse_counts_per_sample %>%
  filter(day == "D14") %>%
  pairwise_t_test(vsg_count ~ space, p.adjust.method = "BH") %>%
  mutate(xmin = 1,
         xmax = 2, 
         y.position = 283) 

tsetse_d14_counts_by_space_plot <- tsetse_counts_per_sample %>%
  filter(day == "D14") %>%
  ggplot(aes(x = space, y = vsg_count, group = interaction(day, space))) + 
  geom_boxplot(aes(fill = space), alpha = .5, width =.75, outlier.size=0) +
  geom_point(aes(color = space),size = 2.5, position = position_jitterdodge(.1)) + 
  theme_pubr(legend = "right", base_size = 20) +
  xlab("") +
  ylab("Number of VSGs") +
  scale_fill_manual(values = color_palette_12) +
  scale_color_manual(values = color_palette_12) +
  add_pvalue(tsetse_counts_by_space_t_test_dat, 
             label = "p.adj.signif",
             xmin = "xmin", 
             xmax = "xmax") +
  scale_y_continuous(limits = c(0,300))
tsetse_d14_counts_by_space_plot

ggsave("figures/figure_S6A.pdf", tsetse_d14_counts_by_space_plot,
       device = "pdf", width = 8, height = 6, units = "in")
tsetse_results_all_combos <- vsg_expand(tsetse_results, "samplename", "cluster") %>%
  separate(samplename, c("mouse", "day", "tissue"), sep = "_", remove = FALSE)

tsetse_starting_vsgs <- tsetse_results %>%
  filter(day  == "D5") %>%
  vsg_max(mouse) %>%
  select(mouse, cluster, mouse) %>%
  merge(tsetse_results_all_combos, by = c('mouse','cluster')) %>%
  mutate(Percent = case_when(Percent < .01 ~ 0,
                             TRUE~ Percent)) %>%
  mutate(Percent = Percent + .001) %>%
  mutate(day = factor(day, levels = c("D5", "D10", "D14"))) 

tsetse_starting_vsg_espression_dat <- tsetse_starting_vsgs %>%
  separate(day, c("day1","day_num"), sep = "D", convert = TRUE, remove = FALSE) %>%
  mutate(day_num_char = as.character(day_num)) %>%
  mutate(day_num_char = factor(day_num_char, levels = c("5","10", "14"))) %>%
  select(-day1) %>% 
  mutate(space = case_when(tissue == "blood" ~ "blood", 
                           TRUE ~ "tissues")) %>%
  mutate(expression = case_when(Percent == .001 ~ "N", 
                                TRUE ~ "Y"),
         day = factor(day_num, levels = c("5", "10", "14"))) 

tsetse_starting_vsg_espression_plot <- tsetse_starting_vsg_espression_dat %>%
  filter(day != 10) %>%
  ggplot(aes(x = day, y = log10(Percent), group = interaction(day, space))) +
  geom_boxplot(aes(fill = space), alpha = .5, width = 1, outlier.shape = NA) +
  geom_point(aes(color = tissue), size = 2.5, position =
               position_jitterdodge(jitter.width = .15, dodge.width = 1)) +
  theme_pubr(legend = "right", base_size = 20) +
  xlab("Time post infection (days)") +
  ylab("Percent of parasites expressing\nthe most abundat day 5 VSG") +
  scale_color_manual(values = color_palette_12) +
  scale_fill_manual(values = color_palette_12) +
  #ylim(-3,3.1) +
  geom_hline(yintercept = -2, linetype = 2) +
  scale_y_continuous(limits = c(-3, 2), labels = c("n.d.", "0.01%","0.1%", 
                                "1.0%", "10%", "100%")) +
  guides(color = guide_legend(title = "Tissue"),
         fill = guide_legend(title = "Space"))
tsetse_starting_vsg_espression_plot

ggsave("figures/figure_S6B.pdf", tsetse_starting_vsg_espression_plot,
       device = "pdf", width = 8, height = 6, units = "in")
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] qPCRr_0.0.1.0    ggprism_1.0.4    rstatix_0.7.2    AICcmodavg_2.3-2
##  [5] scales_1.3.0     ggpubr_0.6.0     devtools_2.4.5   usethis_2.2.2   
##  [9] lubridate_1.9.3  forcats_1.0.0    stringr_1.5.1    dplyr_1.1.4     
## [13] purrr_1.0.2      readr_2.1.4      tidyr_1.3.1      tibble_3.2.1    
## [17] ggplot2_3.4.4    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] pbapply_1.7-2          remotes_2.4.2.1        readxl_1.4.3          
##  [4] rlang_1.1.3            magrittr_2.0.3         compiler_4.3.1        
##  [7] mgcv_1.8-42            systemfonts_1.0.5      callr_3.7.3           
## [10] vctrs_0.6.5            profvis_0.3.8          pkgconfig_2.0.3       
## [13] crayon_1.5.2           fastmap_1.1.1          backports_1.4.1       
## [16] ellipsis_0.3.2         labeling_0.4.3         utf8_1.2.4            
## [19] promises_1.2.1         rmarkdown_2.25         sessioninfo_1.2.2     
## [22] tzdb_0.4.0             ps_1.7.5               ragg_1.2.6            
## [25] bit_4.0.5              xfun_0.40              cachem_1.0.8          
## [28] jsonlite_1.8.7         later_1.3.1            VGAM_1.1-9            
## [31] broom_1.0.5            parallel_4.3.1         prettyunits_1.2.0     
## [34] R6_2.5.1               bslib_0.5.1            stringi_1.8.3         
## [37] car_3.1-2              pkgload_1.3.3          jquerylib_0.1.4       
## [40] cellranger_1.1.0       Rcpp_1.0.11            knitr_1.44            
## [43] httpuv_1.6.12          Matrix_1.6-1.1         splines_4.3.1         
## [46] timechange_0.2.0       tidyselect_1.2.0       rstudioapi_0.15.0     
## [49] abind_1.4-5            yaml_2.3.7             miniUI_0.1.1.1        
## [52] processx_3.8.2         pkgbuild_1.4.2         lattice_0.21-8        
## [55] shiny_1.7.5.1          withr_3.0.0            evaluate_0.22         
## [58] survival_3.5-5         urlchecker_1.0.1       pillar_1.9.0          
## [61] carData_3.0-5          checkmate_2.3.1        stats4_4.3.1          
## [64] generics_0.1.3         vroom_1.6.4            hms_1.1.3             
## [67] munsell_0.5.0          xtable_1.8-4           glue_1.7.0            
## [70] unmarked_1.3.2         tools_4.3.1            vsgseqtools_0.0.0.9000
## [73] ggsignif_0.6.4         fs_1.6.3               colorspace_2.1-0      
## [76] nlme_3.1-162           cli_3.6.2              textshaping_0.3.7     
## [79] fansi_1.0.6            gtable_0.3.4           sass_0.4.7            
## [82] digest_0.6.33          htmlwidgets_1.6.2      farver_2.1.1          
## [85] memoise_2.0.1          htmltools_0.5.6.1      lifecycle_1.0.4       
## [88] mime_0.12              bit64_4.0.5            MASS_7.3-60
 

By Alex Beaver for the Mugnier Lab

abeaver3@jhmi.edu